/* GRAMMAR (terminals in quotes)
 *
 * ExtendedRegex -> BasicRegex '\0' | BasicRegex '|' ExtendedRegex
 * BasicRegex -> SimpleRegex | SimpleRegex BasicRegex
 * SimpleRegex -> NoDupRegex | NoDupRegex DupSymbol
 * NoDupRegex -> Term | '(' ExtendedRegex ')'
 * DupSymbol -> '*'
 * Term -> 'a' | 'b' | ... | 'z' | '0' | '1' | ... | '9'
 */

// TODOs
// Graceful exit (return -1) if the regular expression isn't well-formed
// Add header file defining API
// Add support for escaped characters (preceded by a '\')
// Add support for character classes ([:alnum:], [:space:], [:upper:], [:lower:], etc.)
// Add support for '.' (match any char except '\0') '+' (match one or more) '?' (match zero or one) and anchors ('^' and '$')

 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>

#define EPSILON 0

typedef enum specialClasses
{
    SPECIALS_BEGIN  = 0xF0000,  // Beginning of unicode private use area A
    ANY,    // Represents '.', matches any character except '\0'
    ALNUM,  // [:alnum:]
    ALPHA,  // [:alpha:]
    BLANK,  // [:blank:]
    CNTRL,  // [:cntrl:]
    DIGIT,  // [:digit:]
    GRAPH,  // [:graph:]
    LOWER,  // [:lower:]
    PRINT,  // [:print:]
    PUNCT,  // [:punct:]
    SPACE,  // [:space:]
    UPPER,  // [:upper:]
    XDIGIT, // [:xdigit:]
    SPECIALS_END    = 0xFFFFD
} specialClasses;

typedef enum nodeType
{
    // Alternation node: only has two epsilon moves
    ALT,
    // Concatenation node: only has one transition
    CAT
} nodeType;

typedef struct NFA_State NFA_State;

typedef struct altNode
{
    // Both of these are implicitly epsilon moves
    NFA_State *ptr1;
    NFA_State *ptr2;
} altNode;

typedef struct catNode
{
    NFA_State *next;
    int transitionChar;
} catNode;

typedef union node
{
    altNode altNode;
    catNode catNode;
} node;

struct NFA_State
{
    nodeType type;
    node node;
};

typedef struct NFA
{
    NFA_State *start_state;
    // Transitions that are not yet set to a state: uses
    //  the pointer in the Nodes themselves as a linked list
    //  by casting them to (NFA_State**) and setting their
    //  value to the next pointer in the list
    NFA_State **dangling_transitions;
} NFA;

int Regex(const char *regex, const char *str);

static NFA parseExtendedRegex(const char **regex_ptr);
static NFA parseBasicRegex(const char **regex_ptr);
static NFA parseSimpleRegex(const char **regex_ptr);
static NFA parseNoDupRegex(const char **regex_ptr);
static NFA_State* addAcceptState(NFA nfa);

static int simulateNFA(NFA_State *NFA_start, const char *str);

static NFA_State** addState(NFA_State *state, NFA_State **setOfStates, int *setOfStates_size, int *setOfStates_max);

// Handles alternation
static NFA parseExtendedRegex(const char **regex_ptr)
{
    NFA start = parseBasicRegex(regex_ptr);
    if(!start.start_state)
        return start;
    NFA alt;
    NFA_State **next_dangling_transition = 0;
    NFA_State *alt_state = 0;
    while(**regex_ptr == '|')
    {
        printf("|");
        (*regex_ptr)++;
        alt = parseBasicRegex(regex_ptr);
        if(!alt.start_state)
            break;
        alt_state = (NFA_State*)malloc(sizeof(NFA_State));
        alt_state->type = ALT;
        alt_state->node.altNode.ptr1 = start.start_state;
        alt_state->node.altNode.ptr2 = alt.start_state;
        start.start_state = alt_state;
        // Add the new dangling transitions to the list
        next_dangling_transition = start.dangling_transitions;
        while(*next_dangling_transition)
        {
            next_dangling_transition = (NFA_State**)*next_dangling_transition;
        }
        *next_dangling_transition = (NFA_State*)alt.dangling_transitions;
    }

    return start;
}

// Handles concatenation
static NFA parseBasicRegex(const char **regex_ptr)
{
    NFA start = parseSimpleRegex(regex_ptr);
    if(!start.start_state)
        return start;
    NFA cur = start, next;
    NFA_State **next_dangling_transition = 0;
    while(1)
    {
        next = parseSimpleRegex(regex_ptr);
        if(!next.start_state)
            break;
        // Connect the two NFAs
        while(cur.dangling_transitions)
        {
            next_dangling_transition = (NFA_State**)*cur.dangling_transitions;
            *cur.dangling_transitions = next.start_state;
            cur.dangling_transitions = next_dangling_transition;
        }
        cur = next;
    }
    start.dangling_transitions = cur.dangling_transitions;

    return start;
}

// Handles *
static NFA parseSimpleRegex(const char **regex_ptr)
{
    NFA ret = parseNoDupRegex(regex_ptr);
    if(**regex_ptr == '*')
    {
        // Create a new altNode, direct it to the start state, set it
        //  as the new start state, connect the dangling transitions to
        //  it, then add its second pointer as the new dangling transition
        NFA_State *alt_state = (NFA_State*)malloc(sizeof(NFA_State));
        alt_state->type = ALT;
        alt_state->node.altNode.ptr1 = 0;
        alt_state->node.altNode.ptr2 = ret.start_state;
        ret.start_state = alt_state;
        NFA_State **next_dangling_transition;
        while(ret.dangling_transitions)
        {
            next_dangling_transition = (NFA_State**)*ret.dangling_transitions;
            *ret.dangling_transitions = alt_state;
            ret.dangling_transitions = next_dangling_transition;
        }
        ret.dangling_transitions = &alt_state->node.altNode.ptr1;
        printf("*");
        (*regex_ptr)++;

    }

    return ret;
}
// Handles parentheses and expressions of a single character
static NFA parseNoDupRegex(const char **regex_ptr)
{
    NFA ret;
    ret.start_state = 0;
    ret.dangling_transitions = 0;
    if(isalnum(**regex_ptr))
    {
        ret.start_state = (NFA_State*)malloc(sizeof(NFA_State));
        ret.start_state->type = CAT;
        ret.start_state->node.catNode.transitionChar = **regex_ptr;
        ret.start_state->node.catNode.next = 0;
        ret.dangling_transitions = &(ret.start_state->node.catNode.next);
        printf("%c", **regex_ptr);
        (*regex_ptr)++;
    }
    else if(**regex_ptr == '(')
    {
        (*regex_ptr)++;
        ret = parseExtendedRegex(regex_ptr);
        if(**regex_ptr != ')')
        {
            fprintf(stderr, "Expected ')'");
            exit(1);
        }
        (*regex_ptr)++;
    }
    return ret;
}

static NFA_State* addAcceptState(NFA nfa)
{
    NFA_State **next_dangling_transition = 0;
    NFA_State *accept_state = (NFA_State*)malloc(sizeof(NFA_State));
    accept_state->type = CAT;
    accept_state->node.catNode.next = 0;
    accept_state->node.catNode.transitionChar = EPSILON;
    while(nfa.dangling_transitions)
    {
        next_dangling_transition = (NFA_State**)*nfa.dangling_transitions;
        *nfa.dangling_transitions = accept_state;
        nfa.dangling_transitions = next_dangling_transition;
    }
    // Handles the special case of the empty regular expression
    return nfa.start_state ? nfa.start_state : accept_state;
}

static int simulateNFA(NFA_State *NFA_start, const char *str)
{
    int i;
    NFA_State **t;
    NFA_State **curSetOfStates = (NFA_State**)malloc(sizeof(NFA_State*)*2);
    NFA_State **nextSetOfStates = (NFA_State**)malloc(sizeof(NFA_State*)*2);
    int curSetOfStates_size = 0;
    int curSetOfStates_max = 2;
    int nextSetOfStates_size = 0;
    int nextSetOfStates_max = 2;

    curSetOfStates = addState(NFA_start, curSetOfStates, &curSetOfStates_size, &curSetOfStates_max);
    while(*str)
    {
        for(i=0; i < curSetOfStates_size; i++)
        {
            if(*str == curSetOfStates[i]->node.catNode.transitionChar)
            {
                nextSetOfStates = addState(curSetOfStates[i]->node.catNode.next, nextSetOfStates, &nextSetOfStates_size, &nextSetOfStates_max);
            }
        }
        // Swap
        t = curSetOfStates;
        curSetOfStates = nextSetOfStates;
        nextSetOfStates = t;
        curSetOfStates_max ^= nextSetOfStates_max;
        nextSetOfStates_max ^= curSetOfStates_max;
        curSetOfStates_max ^= nextSetOfStates_max;
        curSetOfStates_size = nextSetOfStates_size;
        nextSetOfStates_size = 0;

        str++;
    }
    // Try to find the accept state, which has a null transition
    for(i=0; i < curSetOfStates_size; i++)
    {
        if(curSetOfStates[i]->type == CAT)
        {
            if(curSetOfStates[i]->node.catNode.next == 0)
            {
                return 1;
            }
        }
    }
    return 0;
}

static NFA_State** addState(NFA_State *state, NFA_State **setOfStates, int *setOfStates_size, int *setOfStates_max)
{
    if(state->type == ALT)
    {
        NFA_State *t1 = state->node.altNode.ptr1;
        NFA_State *t2 = state->node.altNode.ptr2;
        // To prevent loops
        state->node.altNode.ptr1 = 0;
        state->node.altNode.ptr2 = 0;
        if(t1)
        {
            setOfStates = addState(t1, setOfStates, setOfStates_size, setOfStates_max);
        }
        if(t2)
        {
            setOfStates = addState(t2, setOfStates, setOfStates_size, setOfStates_max);
        }
        state->node.altNode.ptr1 = t1;
        state->node.altNode.ptr2 = t2;
    }
    else if(state->type == CAT)
    {
        if(*setOfStates_size == *setOfStates_max)
        {
            *setOfStates_max <<= 1;
            setOfStates = (NFA_State**)realloc(setOfStates, sizeof(NFA_State*)*(*setOfStates_max));
        }
        setOfStates[(*setOfStates_size)++] = state;
    }
    return setOfStates;
}

int Regex(const char *regex, const char *str)
{
    NFA nfa = parseExtendedRegex(&regex);
    NFA_State *nfa_start = addAcceptState(nfa);
    printf("\n");
    int ret = simulateNFA(nfa_start, str);
    return ret;
}
